#Dennis Moskov, Master Thesis
#CT for model fitness
#reduced model
#conversion, selectivity and yield

#using "rpart" package
#install.packages("rpart")
#library(rpart)

#packages for fancy plot
#install.packages("rattle")
#install.packages("rpart.plot")
#library(rattle)
#library(rpart.plot)


#randomly shuffle the data
set.seed(77)                      # seed for reproducibility
DBt<-DB[sample(nrow(DB)),]

#initiate possible results
results<-rbind(c("Conversion","Selectivity","Yield"),c("X.MeOH","S.MeOH","Y.MeOH"),c(length(DBt)-2,length(DBt)-1,length(DBt)))

#loop through different outcomes
for (r in 1:3) {

#use desired outcome
useDB<-DBt[-c(1,as.numeric(results[3,-r]))]

#initiate list for used variables for tree construction
tvar<-vector("list", 1)

#initiate matrix for variable importance analysis
vi<-list()

#matrix for results
mattr<-list()
mattr.names<-c("number","fitted","observed")
mattr$observed<-as.character(useDB[,length(useDB)])

    #grow tree for modell fittnes
    form <- paste(names(useDB)[length(useDB)], "~", paste(names(useDB)[-length(useDB)], collapse=" + "))
    fit<-rpart(form, data=useDB, method="class")

    #use only important variables
    form <- paste(names(useDB)[length(useDB)], "~", paste(names(fit[13][[1]]), collapse=" + "))
    fit<-rpart(form, data=useDB, method="class")
   
    #prune tree
    pfit<-prune(fit, cp=fit$cptable[which.min(fit$cptable[,"xerror"]),"CP"] )
 
    #predict and save most probable classes of DBt
    predtr <- predict(object=pfit,newdata=useDB,type="class")

    #save to mattr list
    mattr$number<-as.numeric(names(predtr))
    mattr$fitted<-as.character(unname(predtr))

 #confusion matrix for trainData
 confutr<-table(mattr$fitted,mattr$observed) 

 #missclassification error for train data
 misstr<-(sum(confutr)-sum(diag(confutr)))/sum(confutr)

#save used variables 
tvar<-levels(fit$frame$var)[-1]

#save variable importance 
vi<-pfit[13]

#plot regression tree
x11()
fancyRpartPlot(pfit,main=paste("Decision Tree for Classification of MeOH",results[1,r]),sub=paste("Missclassification Error for Fitted Classes: ",round(misstr,digits=4)))

write.csv(confutr, file =paste(results[1,r]," confusionFit.csv"))
write.csv(tvar,file=paste(results[1,r]," Variables_Tree_Construction.csv"))
write.csv(cbind(names(unlist(vi)),unlist(vi)), file =(paste(results[1,r]," variable_importance.csv")))

capture.output(asRules(pfit), file = paste(results[1,r]," Rules.txt"))


#save classification tree to file
pdf(paste(results[1,r]," Tree.pdf"))
fancyRpartPlot(pfit,main=paste("Decision Tree for Classification of MeOH",results[1,r]),sub=paste("Missclassification Error for Fitted Classes: ",round(misstr,digits=4)))
dev.off()

}


